home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
NeXTSTEP 3.3 (Developer)…68k, x86, SPARC, PA-RISC]
/
NeXTSTEP 3.3 Dev Intel.iso
/
NextDeveloper
/
Headers
/
indexing
/
IXAttributeReader.h
< prev
next >
Wrap
Text File
|
1994-04-13
|
3KB
|
79 lines
/*
IXAttributeReader.h
Copyright 1991, NeXT Computer, Inc.
*/
#import <objc/Object.h>
#import <objc/hashtable.h>
@class List;
// This protocol is adopted by subclasses that wish to perform special case
// folding or lexeme extraction. The IXJapaneseLexer in the Kanji version of
// the library is a case in point, since the Kanji encodings involve shifting
// between one and two by character representations.
@protocol IXLexemeExtraction
- (unsigned)foldCase:(char *)string inLength:(unsigned)length;
- (unsigned)getLexeme:(char *)string inLength:(unsigned)length
fromStream:(NXStream *)stream;
@end
@interface IXAttributeReader: Object
{
NXHashTable *stopWords; // these words removed from output
const char *punctuation; // characters that delimit words
unsigned char *charMapping; // table for mapping characters
struct {
unsigned caseFolding:1; // fold upper case to lower case
unsigned pluralFolding:1; // fold plural to singular form
unsigned stemsReduced:1; // reduce words to their stems
unsigned tokenUniquing:1; // unique tokens to pack output
} booleanOptions;
}
// analyzes a stream, returning Attribute Reader Format.
- (NXStream *)analyzeStream:(NXStream *)stream;
- (unsigned)foldPlural:(char *)string inLength:(unsigned)length;
- (unsigned)reduceStem:(char *)string inLength:(unsigned)length;
@end
@interface IXAttributeReader(Configuration)
- (BOOL)isCaseFolded; // true if case folding enabled
- setCaseFolded:(BOOL)flag; // enables or disables case folding
- (BOOL)arePluralsFolded; // true if plural folding enabled
- setPluralsFolded:(BOOL)flag; // enables or disables plural folding
- (BOOL)areStemsReduced; // true if stem removal enabled
- setStemsReduced:(BOOL)flag; // enables or disables stem removal
- (char *)punctuation; // returns currently defined token delimiters
- setPunctuation:(const char *)string; // sets token delimiters
- (char *)stopWords; // returns newline delimited stop word string
- setStopWords:(const char *)string; // sets stop words
- readStopWords:(NXStream *)stream; // reads stop words from a stream
- writeStopWords:(NXStream *)stream; // writes stop words to a stream
- readStopWordsFromFile:(const char *)filename; // reads stop words from a file
- writeStopWordsToFile:(const char *)filename; // writes stop words to a file
@end
// The following protocol is obselete, and may be not be defined in future
// releases. The methods are now declared by the classes that implement them.
@protocol IXAttributeReading
- (NXStream *)analyzeStream:(NXStream *)stream;
@end